cap_turn <- paste0(
"The hypothesis is that sites located at the middle of the stream network
will experience the more composition change through time because of two
opposites constraints. Climate change is expected to drive species upward in
the stream network but an opposite constrain gathered into hydrological
contrain, such as water flowand obstacles to migration will restrain the
upward migration. The sum of those constrains is hypothesized to result in a
quadratic relationship between jaccard turnover and distance from source.")
knitr::include_graphics("fig/turnover_hyp.png")
Figure 1.1: The hypothesis is that sites located at the middle of the stream network will experience the more composition change through time because of two opposites constraints. Climate change is expected to drive species upward in the stream network but an opposite constrain gathered into hydrological contrain, such as water flowand obstacles to migration will restrain the upward migration. The sum of those constrains is hypothesized to result in a quadratic relationship between jaccard turnover and distance from source.
dis_cap <- paste0("If sites within basin becomes more dissimilar from their
reference (first year of observation), we
should observe at the same time an homogeneization of sites at the basin scale")
ggplot(tibble(x = c(0, .1)), aes(x)) +
stat_function(fun = function(x) x, geom = "line") +
labs(
x = "Average temporal dissimalitiry within basin",
y = "Temporal similarity at the basin scale"
)
Figure 1.2: If sites within basin becomes more dissimilar from their reference (first year of observation), we should observe at the same time an homogeneization of sites at the basin scale
loc <- filtered_dataset$location %>%
left_join(filtered_dataset$site_quali, by = "siteid") %>%
st_as_sf(coords = c("longitude", "latitude"),
crs = 4326)
fig_sites <- paste0(
"Australia absence because span is superior to 10 years of 100s of sites but number of samplings < 10.")
world <- ne_countries(scale = "medium", returnclass = "sf") %>%
st_transform(crs = 4326)
bb <- st_bbox(loc)
ggplot(data = world) +
geom_sf() +
geom_sf(data = loc, aes(color = protocol), shape = 1) +
# coord_sf(xlim = bb[c("xmin", "xmax")],
# ylim = bb[c("ymin", "ymax")],
# datum = 4326, expand = TRUE) +
theme(legend.position = "bottom") +
labs(title = paste0("Number of sites: ", nrow(loc)))
Figure 2.1: Australia absence because span is superior to 10 years of 100s of sites but number of samplings < 10.
We used also hill numbers with coverage-based correction proposed by (???)
skim(analysis_dataset, starts_with("chao_"), "species_nb", "shannon", "simpson",
"evenness")
| Name | analysis_dataset |
| Number of rows | 62174 |
| Number of columns | 57 |
| _______________________ | |
| Column type frequency: | |
| numeric | 8 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| chao_richness | 0 | 1 | 5.12 | 5.45 | 1 | 1.94 | 3.14 | 6.44 | 85.71 | ▇▁▁▁▁ |
| chao_shannon | 0 | 1 | 2.79 | 2.59 | 0 | 1.35 | 2.13 | 3.63 | 33.63 | ▇▁▁▁▁ |
| chao_simpson | 0 | 1 | 2.23 | 1.92 | 0 | 1.20 | 1.79 | 2.89 | 40.00 | ▇▁▁▁▁ |
| chao_evenness | 0 | 1 | 1.93 | 1.54 | 0 | 1.47 | 2.06 | 2.47 | 38.25 | ▇▁▁▁▁ |
| species_nb | 0 | 1 | 5.60 | 5.92 | 1 | 2.00 | 3.00 | 7.00 | 72.00 | ▇▁▁▁▁ |
| shannon | 0 | 1 | 0.86 | 0.65 | 0 | 0.34 | 0.76 | 1.29 | 3.54 | ▇▆▃▁▁ |
| simpson | 0 | 1 | 0.42 | 0.28 | 0 | 0.17 | 0.48 | 0.66 | 0.96 | ▇▅▇▇▃ |
| evenness | 0 | 1 | 0.54 | 0.30 | 0 | 0.33 | 0.63 | 0.77 | 1.00 | ▅▂▅▇▅ |
Betadiversity metrics compares the composition of two communities. In our case, we compare the composition of each time step relatively to the baseline, i.e. the first year of sampling.
Relative abundance: \(SER_a = \dfrac{\sum_i (p_i - p^{\prime}_i)^2}{\sum_i p_i^2 + \sum_i p^{\prime2}_i - \sum_i p_i p^{\prime}_i}\)
\(J\): Jaccard index
Turnover metrics based on presence/absence can be decomposed in appearance/disappearance and nestedness / turnover.
appearance: \(\dfrac{S_{imm}}{S_{tot}}\), disappearance: \(\dfrac{S_{lost}}{S_{tot}}\)
Nestedness: \(SER_r - J_t\)
skim(analysis_dataset, "jaccard", "turnover", "nestedness", "appearance",
"disappearance", "hillebrand")
| Name | analysis_dataset |
| Number of rows | 62174 |
| Number of columns | 57 |
| _______________________ | |
| Column type frequency: | |
| numeric | 6 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| jaccard | 0 | 1 | 0.65 | 0.27 | 0 | 0.50 | 0.67 | 1.00 | 1.00 | ▁▃▇▆▇ |
| turnover | 0 | 1 | 0.16 | 0.25 | 0 | 0.00 | 0.00 | 0.33 | 1.00 | ▇▂▁▁▁ |
| nestedness | 0 | 1 | 0.19 | 0.22 | 0 | 0.00 | 0.10 | 0.33 | 0.96 | ▇▂▂▁▁ |
| appearance | 0 | 1 | 0.14 | 0.16 | 0 | 0.00 | 0.10 | 0.24 | 0.92 | ▇▃▁▁▁ |
| disappearance | 0 | 1 | 0.11 | 0.14 | 0 | 0.00 | 0.00 | 0.17 | 0.86 | ▇▂▁▁▁ |
| hillebrand | 0 | 1 | 0.68 | 0.33 | 0 | 0.42 | 0.79 | 0.99 | 1.00 | ▂▂▂▂▇ |
tar_load(riveratlas_site)
riv <- riveratlas_site %>%
select(all_of(c("siteid", setNames(get_river_atlas_significant_var(), NULL)))) %>%
mutate(across(starts_with("tmp_"), ~.x / 10)) %>%
st_drop_geometry()
riv %>%
rename(get_river_atlas_significant_var()) %>%
pivot_longer(cols = -siteid, names_to = "variable", values_to = "values") %>%
ggplot(aes(x = values)) +
geom_histogram() +
facet_wrap(~variable, scales = "free")
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
skim(modelling_data)
| Name | modelling_data |
| Number of rows | 54626 |
| Number of columns | 18 |
| _______________________ | |
| Column type frequency: | |
| character | 1 |
| factor | 1 |
| numeric | 16 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| siteid | 0 | 1 | 2 | 6 | 0 | 4916 | 0 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| main_bas | 0 | 1 | FALSE | 291 | 708: 2696, 208: 2400, 208: 2252, 208: 1566 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| year | 0 | 1 | 2004.86 | 8.57 | 1955.0 | 1999.00 | 2006.00 | 2012.00 | 2019.00 | ▁▁▂▇▇ |
| year_nb | 0 | 1 | 11.17 | 8.66 | 0.0 | 4.00 | 10.00 | 17.00 | 63.00 | ▇▃▁▁▁ |
| scaled_dist_up_km | 0 | 1 | 0.00 | 1.00 | -0.4 | -0.35 | -0.28 | -0.11 | 13.99 | ▇▁▁▁▁ |
| span | 0 | 1 | 22.45 | 8.46 | 10.0 | 16.00 | 22.00 | 28.00 | 64.00 | ▇▇▂▁▁ |
| jaccard_scaled | 0 | 1 | 0.65 | 0.27 | 0.0 | 0.50 | 0.66 | 1.00 | 1.00 | ▁▅▇▆▇ |
| turnover | 0 | 1 | 0.16 | 0.25 | 0.0 | 0.00 | 0.00 | 0.33 | 1.00 | ▇▂▁▁▁ |
| nestedness | 0 | 1 | 0.19 | 0.22 | 0.0 | 0.00 | 0.10 | 0.33 | 0.96 | ▇▂▂▁▁ |
| species_nb | 0 | 1 | 5.88 | 6.05 | 1.0 | 2.00 | 4.00 | 7.00 | 72.00 | ▇▁▁▁▁ |
| log_species_nb | 0 | 1 | 1.38 | 0.87 | 0.0 | 0.69 | 1.39 | 1.95 | 4.28 | ▇▇▆▂▁ |
| chao_richness | 0 | 1 | 5.36 | 5.56 | 1.0 | 1.96 | 3.37 | 6.90 | 85.71 | ▇▁▁▁▁ |
| hillebrand | 0 | 1 | 0.67 | 0.33 | 0.0 | 0.41 | 0.78 | 0.99 | 1.00 | ▂▂▂▂▇ |
| nestedness_scaled | 0 | 1 | 0.19 | 0.22 | 0.0 | 0.00 | 0.10 | 0.33 | 0.96 | ▇▂▂▁▁ |
| turnover_scaled | 0 | 1 | 0.16 | 0.25 | 0.0 | 0.00 | 0.00 | 0.33 | 1.00 | ▇▂▁▁▁ |
| hillebrand_scaled | 0 | 1 | 0.67 | 0.33 | 0.0 | 0.41 | 0.78 | 0.99 | 1.00 | ▂▂▂▂▇ |
| log1_year_nb | 0 | 1 | 2.15 | 0.96 | 0.0 | 1.61 | 2.40 | 2.89 | 4.16 | ▃▃▇▇▁ |
| one | 0 | 1 | 1.00 | 0.00 | 1.0 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▇▁▁ |
In each site and at each sampling point, we computed the dissimilarity (Jaccard, Appearance, Disappearance, Turnover & Nestedness) between the time step and the reference time, i.e. the first year of sampling. Regarding the reference time step, there are two different strategies. First is to include it, i.e. setting a dissimilarity of 0 at each beginning of a time series (Blowes et al. 2019). Another strategy is to exclude the first year from the analysis (Dornelas et al. 2014). The first option logically constrains the analysis toward increasing dissimilarity as the second can capture recovery from perturbation soon after the first samplings. I guess that the first option makes more sense more it removes to sensibility to community changes happening just after the references year.
Simple lm by site
Model all in one:
Jaccard similarity, nestedness and turnover were modelled in two ways, Beta and gaussian regression. Beta regressions are more appropriate for distribution borned by [0,1] and are linearized by a logit link. Logit links have the cons that coefficients are less easy to interpret. So we run both gaussian and beta regressions and investigate the relationship among coefficients of those type modelling types. Values of turnover were slightly transformed such as values of one and zero to fit beta regression requirements (\(x' = \dfrac{x \times (N - 1) + 0.5}{N}\)).
The model is defined like this:
paste0(
"jaccard_scaled ~
0 + year_nb * scaled_dist_up_km +
(0 + year_nb | main_bas/siteid) +
(0 + year_nb | span) +
(0 + year_nb:scaled_dist_up_km | main_bas)"
)
#> [1] "jaccard_scaled ~\n 0 + year_nb * scaled_dist_up_km +\n (0 + year_nb | main_bas/siteid) +\n (0 + year_nb | span) +\n (0 + year_nb:scaled_dist_up_km | main_bas)"
year_nb: number of year since the beginning of the monitoring (site scale)scaled_dist_up_km: Distance from the source (centered and scaled)
main_bas: Hydrographic basinsiteid: sitespan: Number of years between the end and the beginning of the monitoring
Our objective was to assess the temporal turnover and its ecological drivers. We
set intercept to equal 1.0 because year_nb = 0 at the beginning of each series,
jaccard is always equal to 1.0. We estimated the effects of the basin and of the
site on the temporal trends (0 + year_nb | main_bas/siteid), the site effect
being nested in the basin effect. We also evaluated the effects of span on the
temporal trends, because it is generally expected that longer timeseries
displays stronger temporal trends. Finally, we acknowledged that the effect of
the distance from source can be different among basin.
tar_load(var_analysis)
ti <- map_dfr(rigal_trends, ~tabyl_df(x = .x, group = "direction"),
.id = "response"
)
ti %>%
filter(direction != "Total", response %in% var_analysis) %>%
select(response, direction, percent) %>%
mutate(response = str_replace_all(response, get_var_replacement())) %>%
pivot_wider(names_from = "direction", values_from = "percent") %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "bordered", "hover"))
| response | stable | increase | decrease |
|---|---|---|---|
| Species richness | 81.1% | 11.5% | 5.7% |
| Log species richness | 79.5% | 11.1% | 5.2% |
| Chao species richness | 83.2% | 9.4% | 6.1% |
| SER_a (rel abundance) | 73.5% | 0.7% | 24.5% |
| Nestedness (jaccard) | 80.3% | 13.1% | 0.5% |
| Turnover (jaccard) | 53.9% | 12.2% | 0.2% |
ti <- map_dfr(rigal_trends, ~tabyl_df(x = .x, group = "shape_class"),
.id = "response"
)
ti %>%
filter(shape_class != "Total") %>%
select(response, shape_class, percent) %>%
mutate(response = str_replace_all(response, get_var_replacement())) %>%
pivot_wider(names_from = "shape_class", values_from = "percent") %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "bordered", "hover"))
| response | stable_constant | increase_constant | decrease_constant | stable_convex | stable_concave | decrease_decelerated | increase_accelerated | decrease_accelerated | increase_decelerated | NA_constant |
|---|---|---|---|---|---|---|---|---|---|---|
| Total abundance | 72.7% | 9.1% | 6.2% | 6.0% | 4.1% | 1.0% | 0.8% | 0.1% | 0.1% | NA |
| Log Total Turnover (jaccard) (codyn) abundance | 69.5% | 10.4% | 6.9% | 4.8% | 6.3% | 0.3% | 0.2% | 0.4% | 1.2% | 0.0% |
| Species richness | 70.7% | 9.5% | 4.8% | 4.3% | 6.0% | 0.7% | 0.8% | 0.3% | 1.1% | 1.8% |
| Log species richness | 69.3% | 9.3% | 4.4% | 3.4% | 6.9% | 0.5% | 0.7% | 0.3% | 1.1% | 4.3% |
| Chao species richness | 74.1% | 7.8% | 5.3% | 4.6% | 4.5% | 0.7% | 0.9% | 0.1% | 0.7% | 1.3% |
| Chao Shannon | 71.1% | 8.5% | 5.1% | 4.2% | 5.5% | 0.4% | 0.7% | 0.2% | 0.6% | 3.7% |
| Chao Simpson | 72.2% | 7.8% | 5.2% | 4.3% | 5.2% | 0.5% | 0.6% | 0.1% | 0.5% | 3.7% |
| Chao Evenness | 77.5% | 5.5% | 4.6% | 3.3% | 4.2% | 0.3% | 0.4% | 0.1% | 0.4% | 3.7% |
| Jaccard (binary, similarity) | 55.1% | 0.3% | 22.0% | 14.3% | 1.0% | 4.5% | NA | 0.7% | 0.3% | 1.8% |
| Horn (binary, similarity) | 58.7% | 0.3% | 22.1% | 11.9% | 1.1% | 3.1% | NA | 0.8% | 0.3% | 1.8% |
| Chao (binary, similarity) | 59.3% | 0.9% | 7.6% | 9.2% | 3.9% | 3.7% | NA | 1.3% | 1.7% | 12.4% |
| SER_a (rel abundance) | 60.3% | 0.5% | 19.5% | 11.8% | 1.5% | 3.9% | 0.0% | 1.2% | 0.1% | 1.3% |
| Total Turnover (jaccard) (codyn) | 56.4% | 22.1% | 0.3% | 0.8% | 11.1% | NA | 0.9% | NA | 3.0% | 5.5% |
| Appearance | 58.9% | 16.7% | 0.4% | 0.9% | 8.5% | NA | 1.2% | 0.0% | 1.8% | 11.6% |
| disAppearance | 53.0% | 12.5% | 0.2% | 1.1% | 7.1% | NA | 0.6% | NA | 1.7% | 23.7% |
| Evenness | 73.6% | 6.7% | 5.6% | 4.1% | 4.8% | 0.4% | 0.5% | 0.2% | 0.5% | 3.6% |
| Shannon | 70.3% | 8.9% | 5.6% | 4.0% | 5.7% | 0.3% | 0.6% | 0.2% | 0.7% | 3.7% |
| Simpson | 72.2% | 7.8% | 5.1% | 3.7% | 5.6% | 0.3% | 0.5% | 0.3% | 0.8% | 3.6% |
| Jaccard (binary, dissimilarity) | 53.0% | 22.0% | 0.3% | 0.7% | 13.7% | NA | 0.7% | NA | 4.2% | 5.5% |
| Nestedness (jaccard) | 71.7% | 11.2% | 0.5% | 1.0% | 7.6% | NA | 0.8% | NA | 1.1% | 6.2% |
| Turnover (jaccard) | 48.0% | 10.5% | 0.2% | 1.0% | 4.9% | NA | 0.6% | 0.0% | 1.1% | 33.7% |
slope <- map_dfr(rigal_trends,
~.x %>%
select(siteid, linear_slope),
.id = "response"
)
rigal_trends_df <- map2_dfr(
rigal_trends, names(rigal_trends),
~.x %>% mutate(variable = .y)
) %>%
select(variable, siteid, linear_slope) %>%
pivot_wider(names_from = "variable", values_from = "linear_slope")
summary_slope <- slope %>%
filter(response %in% var_analysis) %>%
group_by(response) %>%
summarise(summ = list(enframe(summary_distribution(linear_slope, na.rm = TRUE)))) %>%
unnest(cols = summ) %>%
pivot_wider(names_from = "name", values_from = "value") %>%
select(response, mean, median, sd)
summary_slope %>%
mutate(response = get_var_replacement()[response]) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "bordered", "hover")) %>%
scroll_box(width = "100%", height = "1000px")
| response | mean | median | sd |
|---|---|---|---|
| Chao species richness | 0.0160926 | 0.0027879 | 0.1864729 |
| SER_a (rel abundance) | -0.0184716 | -0.0134346 | 0.0216062 |
| Log species richness | 0.5690175 | 0.1305570 | 3.5608475 |
| Nestedness (jaccard) | 0.0090678 | 0.0053424 | 0.0154355 |
| Species richness | 0.0233072 | 0.0038939 | 0.1763604 |
| Turnover (jaccard) | 0.0089710 | 0.0022039 | 0.0161379 |
p_jt_sup_ne <- sum(abs(rigal_trends_df$turnover) > abs(rigal_trends_df$nestedness)) /
nrow(rigal_trends_df)
tar_load(gaussian_jaccard_tmb)
library(glmmTMB)
confint(gaussian_jaccard_tmb$mod[[1]], "year_nb", component = "cond")
#> 2.5 % 97.5 % Estimate
#> year_nb -0.029843 -0.02280809 -0.02632554
summary(gaussian_jaccard_tmb$mod[[1]])
#> Family: gaussian ( identity )
#> Formula: jaccard_scaled ~ 0 + year_nb/scaled_dist_up_km + (0 + year_nb |
#> main_bas/siteid) + (0 + year_nb | span) + (0 + year_nb:scaled_dist_up_km | main_bas)
#> Data: data
#> Offset: offset
#>
#> AIC BIC logLik deviance df.resid
#> 1884.6 1946.9 -935.3 1870.6 54619
#>
#> Random effects:
#>
#> Conditional model:
#> Groups Name Variance Std.Dev.
#> siteid.main_bas year_nb 1.581e-04 0.012575
#> main_bas year_nb 4.827e-05 0.006948
#> span year_nb 1.055e-04 0.010271
#> main_bas.1 year_nb:scaled_dist_up_km 8.619e-05 0.009284
#> Residual 5.210e-02 0.228258
#> Number of obs: 54626, groups: siteid:main_bas, 4916; main_bas, 291; span, 46
#>
#> Dispersion estimate for gaussian family (sigma^2): 0.0521
#>
#> Conditional model:
#> Estimate Std. Error z value Pr(>|z|)
#> year_nb -0.026326 0.001795 -14.669 < 2e-16 ***
#> year_nb:scaled_dist_up_km -0.007737 0.001651 -4.687 2.77e-06 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggpredict(gaussian_jaccard_tmb$mod[[1]], "year_nb")
#> # Predicted values of jaccard_scaled
#> # x = year_nb
#>
#> x | Predicted | 95% CI
#> -------------------------------
#> 0 | 1.00 | [ 1.00, 1.00]
#> 10 | 0.74 | [ 0.70, 0.77]
#> 20 | 0.47 | [ 0.40, 0.54]
#> 30 | 0.21 | [ 0.10, 0.32]
#> 40 | -0.05 | [-0.19, 0.09]
#> 50 | -0.32 | [-0.49, -0.14]
#> 60 | -0.58 | [-0.79, -0.37]
#> 70 | -0.84 | [-1.09, -0.60]
#>
#> Adjusted for:
#> * scaled_dist_up_km = -0.00
#> * siteid = NA (population-level)
#> * main_bas = NA (population-level)
#> * span = NA (population-level)
#> * offset = 1.00
rigal_trends_df %>%
ggplot(aes(x = jaccard , y = hillebrand)) +
geom_point() +
# geom_smooth(method = "gam") +
labs(x = "Jaccard trends (similarity, binary)", y = "Hillebrand trends
(similarity, relative abundance)")
rigal_trends_df_loc <- filtered_dataset$location %>%
left_join(rigal_trends_df, by = "siteid") %>%
st_as_sf(coords = c("longitude", "latitude"),
crs = 4326)
knitr::include_graphics(here("doc", "fig", "p_cor_slope_tot.png"))
ti <- expand.grid(
resp1 = unique(slope$response),
resp2 = unique(slope$response)
) %>%
filter(resp2 != resp1) %>%
filter(
resp1 %in% c("chao_richness", "species_nb", "log_species_nb",
"total_abundance", "log_total_abundance")) %>%
mutate_all(as.character) %>%
arrange(resp1)
test <- map2(ti$resp1, ti$resp2,
function(x, y) {
bi <- slope %>%
filter(response %in% c(x, y)) %>%
select(siteid, response, linear_slope) %>%
pivot_wider(names_from = "response", values_from = "linear_slope")
return(bi)
}
)
p_trends_trends <- map(test, function(x) {
l <- colnames(x)
x %>%
ggplot(aes(x = !!sym(l[2]), y = !!sym(l[3]))) +
geom_point() +
geom_smooth(method = "loess") +
labs(
x = get_var_replacement()[l[2]],
y = get_var_replacement()[l[3]]
)
})
names(p_trends_trends) <- map_chr(test, ~colnames(.x)[2])
plot_grid(
plotlist = p_trends_trends[names(p_trends_trends) %in% "log_species_nb"],
ncol = 3
)
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
#> `geom_smooth()` using formula 'y ~ x'
Cf other document
library(ade4)
library(factoextra)
#> Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
tar_load(rigal_slp_df)
slope_df <- rigal_slp_df
res.pca <- dudi.pca(select(slope_df, -siteid),
scannf = FALSE, # Hide scree plot
nf = 5 # Number of components kept in the results
)
fviz_eig(res.pca)
#> Registered S3 methods overwritten by 'car':
#> method from
#> influence.merMod lme4
#> cooks.distance.influence.merMod lme4
#> dfbeta.influence.merMod lme4
#> dfbetas.influence.merMod lme4
p_pca <- map(list(c(1,2), c(2, 3), c(1, 3)),
~fviz_pca_var(res.pca,
axes = .x,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
) +
theme(legend.position = "none") + labs(title = "")
)
plot_grid(plotlist = p_pca, ncol = 1)
#> Warning: ggrepel: 7 unlabeled data points (too many overlaps). Consider increasing max.overlaps
riv_data_pca <- riv %>%
select(-siteid) %>%
mutate(across(c(dist_up_km, dis_m3_pyr, urb_pc_cse, ele_mt_cav, pac_pc_cse),
~log(.x + 2))
) %>%
rename(get_river_atlas_significant_var()) %>%
na.omit()
pca_riv <- dudi.pca(df = scale(riv_data_pca), scannf = FALSE, nf = 3)
fviz_eig(pca_riv)
p_pca <- map(list(c(1,2), c(2, 3), c(1, 3)),
~fviz_pca_var(pca_riv,
axes = .x,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
) +
theme(legend.position = "none") + labs(title = "")
)
p_pca[[1]]
p_pca[[2]]
p_pca[[3]]
#### Environment - Trends
tar_load(slp_env)
slp_env_for_pca <- slp_env %>%
select(-all_of(c("siteid", "main_bas", "ecoregion", "geometry"))) %>%
rename(get_river_atlas_significant_var())
slp_env_for_pca$geometry <- NULL
pca_slp_env <- dudi.pca(
df = scale(na.omit(slp_env_for_pca)),
scannf = FALSE, nf = 3)
p_slp_env_pca <- map(list(c(1,2), c(2, 3), c(1, 3)),
~fviz_pca_var(pca_slp_env,
axes = .x,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
) +
theme(legend.position = "none") + labs(title = "")
)
p_slp_env_pca[[1]]
#> Warning: ggrepel: 10 unlabeled data points (too many overlaps). Consider increasing max.overlaps
p_slp_env_pca[[2]]
#> Warning: ggrepel: 10 unlabeled data points (too many overlaps). Consider increasing max.overlaps
p_slp_env_pca[[3]]
#> Warning: ggrepel: 11 unlabeled data points (too many overlaps). Consider increasing max.overlaps
tar_load(c(gaussian_jaccard_tmb, beta_jaccard_tmb))
tidy_gaus_jy <- gaussian_jaccard_tmb %>%
filter(year_var == "year_nb") %>%
.$mod
names(tidy_gaus_jy) <- gaussian_jaccard_tmb[
gaussian_jaccard_tmb$year_var == "year_nb",
]$var_jaccard
dw <- dwplot(tidy_gaus_jy,
vline = geom_vline(xintercept = 0, colour = "grey50", linetype = 2)) +
xlab("Coefficient")
dw
diagnose(gaussian_jaccard_tmb$mod[[1]])
#> model looks OK!
diagnose(gaussian_jaccard_tmb$mod[[3]])
#> model looks OK!
diagnose(gaussian_jaccard_tmb$mod[[5]])
#> Unusually large Z-statistics (|x|>5):
#>
#> year_nb:scaled_dist_up_km
#> -5.182309
#>
#> Large Z-statistics (estimate/std err) suggest a failure of the Wald approximation - often also associated with parameters that are at
#> or near the edge of their range (e.g. random-effects standard deviations approaching 0). While the Wald p-values and standard errors
#> listed in summary() are unreliable, profile confidence intervals (see ?confint.glmmTMB) and likelihood ratio test p-values derived by
#> comparing models (e.g. ?drop1) may still be OK. (Note that the LRT is conservative when the null value is on the boundary, e.g. a
#> variance or zero-inflation value of 0 (Self and Liang 1987; Stram and Lee 1994; Goldman and Whelan 2000); in simple cases the p-value
#> is approximately twice as large as it should be.)
pred <- ggemmeans(gaussian_jaccard_tmb$mod[[1]],
terms = c("year_nb", "scaled_dist_up_km [quart2]"),
type = "re")
#> NOTE: A nesting structure was detected in the fitted model:
#> scaled_dist_up_km %in% year_nb
plot(pred)
pred <- ggemmeans(gaussian_jaccard_tmb$mod[[3]],
terms = c("year_nb", "scaled_dist_up_km [quart2]"),
type = "re.zi")
#> NOTE: A nesting structure was detected in the fitted model:
#> scaled_dist_up_km %in% year_nb
plot(pred)
pred <- ggemmeans(gaussian_jaccard_tmb$mod[[5]],
terms = c("year_nb", "scaled_dist_up_km [quart2]"),
type = "re.zi")
#> NOTE: A nesting structure was detected in the fitted model:
#> scaled_dist_up_km %in% year_nb
#> Warning: Can't compute random effect variances. Some variance components equal zero. Your model may suffer from singularity (see '?lme4::isSingular' and
#> '?performance::check_singularity').
#> Solution: Respecify random structure! You may also decrease the 'tolerance' level to enforce the calculation of random effect variances.
plot(pred)
hist(coef(gaussian_jaccard_tmb$mod[[1]])$cond[["siteid:main_bas"]]$year_nb)
hist(coef(gaussian_jaccard_tmb$mod[[1]])$cond[["main_bas"]]$year_nb)
tar_load(filtered_dataset)
loc <- filtered_dataset$location %>%
st_as_sf(coords = c("longitude", "latitude"),
crs = 4326)
world <- ne_countries(scale = "medium", returnclass = "sf")
pred_slope <- gaussian_jaccard_tmb %>%
mutate(
site_slope = map(mod, function(m) {
coef(m)$cond[["siteid:main_bas"]] %>%
rownames_to_column(var = "siteid_main_bas") %>%
separate(
col = "siteid_main_bas",
into = c("siteid", "main_bas"),
sep = ":") %>%
select(-main_bas)
}),
basin_slope = map(mod, function(m) {
coef(m)$cond[["main_bas"]] %>%
rownames_to_column(var = "main_bas")
}),
) %>%
select(-mod)
pred_slope$basin_slope[[1]] %>%
as_tibble()
#> # A tibble: 291 × 3
#> main_bas year_nb `year_nb:scaled_dist_up_km`
#> <chr> <dbl> <dbl>
#> 1 1080023890 -0.0288 -0.00504
#> 2 1080040200 -0.0260 -0.00331
#> 3 2080008490 -0.0441 -0.00582
#> 4 2080016240 -0.0198 -0.0113
#> 5 2080016250 -0.0280 -0.00676
#> 6 2080016280 -0.0202 -0.00793
#> 7 2080016290 -0.0170 -0.0120
#> 8 2080016350 -0.0306 -0.00758
#> 9 2080016400 -0.0225 -0.00945
#> 10 2080016460 -0.0292 -0.00693
#> # … with 281 more rows
pred_slope <- pred_slope %>%
mutate(
sf_site = map(site_slope,
~left_join(.x, select(loc, siteid, geometry), by = "siteid") %>%
st_as_sf()
),
sf_basin = map(basin_slope,
~left_join(.x, loc %>%
mutate(main_bas = as.character(main_bas)) %>%
select(main_bas, geometry), by = "main_bas") %>%
st_as_sf()
),
sf_site_map = map2(sf_site, year_var,
~ggplot(data = world) +
geom_sf() +
geom_sf(data = .x, aes(color = year_nb), shape = 16) +
scale_color_viridis() +
theme(legend.position = "bottom")),
sf_basin_map = map2(sf_basin, year_var,
~ggplot(data = world) +
geom_sf() +
geom_sf(data = .x, aes(color = year_nb), shape = 16) +
scale_color_viridis() +
theme(legend.position = "bottom"))
)
pred_slope$sf_site_map[[1]] +
labs(title = "Jaccard")
pred_slope$sf_site_map[[3]] +
labs(title = "Turnover")
pred_slope$sf_site_map[[5]] +
labs(title = "Nestedness")
pred_slope$sf_basin_map[[1]] +
labs(title = "Jaccard")
pred_slope$sf_basin_map[[3]] +
labs(title = "Turnover")
pred_slope$sf_basin_map[[5]] +
labs(title = "Nestedness")
tar_load(gaussian_rich_tmb)
lry <- gaussian_rich_tmb$mod[[5]]
cry <- gaussian_rich_tmb$mod[[1]]
knitr::opts_chunk$set(eval = FALSE)
paste0(var_temporal_trends,
" ~ ",
"dist_up_km + tmp_dc_cyr +
(1 + dist_up_km + tmp_dc_cyr | main_bas)"
) %>%
as.formula
library(spaMM)
tar_load(c(trend_env))
names(trend_env) <- var_temporal_trends
ti_trends <- map(var_temporal_trends,
~fitme(
as.formula(paste0("scale(", .x, ") ~ dist_up_km + tmp_c_cyr + (1 | main_bas)")),
data = slp_env[,
colnames(slp_env) %in%
c(.x, "siteid", "dist_up_km", "tmp_c_cyr", "main_bas", "ecoregion")
]
)
)
names(ti_trends) <- var_temporal_trends
ci <- map_dfr(ti_trends[names(ti_trends) %in% c("log_total_abundance", "log_species_nb", "jaccard",
"appearance", "disappearance", "turnover",
"nestedness", "hillebrand")], function(v) {
x <- confint(v, c("dist_up_km", "tmp_c_cyr"), verbose = FALSE)
tmp <- map_dfr(x, function(x) {
tm <- x$interval
names(tm) <- c("lower", "upper")
return(tm)
}, .id = "parm"
)
return(tmp)
}, .id = "response")
ci %>%
kable(.,
caption = "Confidence interval (bootstrap)")
from_data_to_predict <- function(
model_list = NULL,
dataset = NULL,
re = NULL
) {
pred <- na.omit(dataset)
for (i in seq_along(names(model_list))) {
tmp <- predict(model_list[[i]],
re.form = as.formula(re),
intervals = "predVar"
)[,1]
names(tmp) <- NULL
pred[[names(model_list)[i]]] <- tmp
}
return(pred)
}
xx <- list(
tmp_c_cyr = seq(min(na.omit(slp_env$tmp_c_cyr)), max(na.omit(slp_env$tmp_c_cyr)), length.out = 10),
dist_up_km = seq(min(na.omit(slp_env$dist_up_km)), max(na.omit(slp_env$dist_up_km)), length.out = 10),
main_bas = unique(na.omit(slp_env$main_bas))
)
new_data <- expand.grid(xx) %>%
as_tibble()
pred <- predict(ti_trends[[1]], newdata = new_data, re.form = re, intervals="predVar")
int <- cbind(pred[,1], attr(pred, "intervals")) %>%
as_tibble() %>%
rename(y = V1)
xxx <- cbind(int, new_data) %>%
as_tibble
xxx %>%
ggplot(aes(y = y, x = dist_up_km, color = main_bas)) +
geom_point() +
theme(legend.position = "none")
re <- paste0("~ tmp_c_cyr + (1 | main_bas)")
pred_tmp <- from_data_to_predict(
model_list = ti_trends,
dataset = slp_env,
re = re
)
p_tmp <- map(
c(
"log_total_abundance", "log_species_nb", "jaccard",
"appearance", "disappearance", "turnover",
"nestedness", "hillebrand"),
function(x) {
pred_tmp %>%
ggplot(aes(
y = !!sym(x),
x = tmp_c_cyr,
color = main_bas
)) +
geom_line() +
theme(legend.position = "none")
}
)
plot_grid(plotlist = p_tmp, ncol = 3)
re <- paste0("~ dist_up_km + (1 | main_bas)")
pred <- na.omit(slp_env)
for (i in seq_along(var_temporal_trends)) {
tmp <- predict(trend_env[[i]],
re.form = as.formula(re)
)[, 1]
names(tmp) <- NULL
pred[[var_temporal_trends[i]]] <- tmp
}
p <- map(
c("log_total_abundance", "log_species_nb", "jaccard",
"appearance", "disappearance", "turnover",
"nestedness", "hillebrand"),
function(x) {
pred %>%
ggplot(aes(
y = !!sym(x),
x = dist_up_km,
color = as.factor(main_bas),
shape = as.factor(ecoregion)
)) +
geom_line() +
theme(legend.position = "none")
}
)
plot_grid(plotlist = p, ncol = 3)
re <- paste0("~ tmp_dc_cyr + (1 | main_bas)")
pred_tmp <- from_data_to_predict(
model_list = trend_env,
dataset = slp_env,
re = re
)
p_tmp <- map(
c("log_species_nb", "jaccard", "appearance", "disappearance", "turnover", "hillebrand"),
function(x) {
pred_tmp %>%
ggplot(aes(
y = !!sym(x),
x = tmp_dc_cyr,
color = as.factor(main_bas)
)) +
geom_line() +
theme(legend.position = "none")
}
)
plot_grid(plotlist = p_tmp, ncol = 3)
re <- paste0("~ dist_up_km + (1 + dist_up_km | ecoregion)")
pred_dist_ecoregion <- from_data_to_predict(
model_list = trend_env,
dataset = slp_env,
re = re
)
p_dist_ecoregion <- map(
c("log_species_nb", "jaccard", "appearance", "disappearance", "turnover", "hillebrand"),
function(x) {
pred_dist_ecoregion %>%
ggplot(aes(
y = !!sym(x),
x = dist_up_km,
color = as.factor(ecoregion)
)) +
geom_line() +
theme(legend.position = "none")
}
)
plot_grid(plotlist = p_dist_ecoregion, ncol = 3)
re <- paste0("~ tmp_dc_cyr + (1 + tmp_dc_cyr | ecoregion)")
pred_tmp_ecoregion <- from_data_to_predict(
model_list = trend_env,
dataset = slp_env,
re = re
)
p_tmp_ecoregion <- map(
c("log_species_nb", "jaccard", "appearance", "disappearance", "turnover", "hillebrand"),
function(x) {
pred_tmp_ecoregion %>%
ggplot(aes(
y = !!sym(x),
x = tmp_dc_cyr,
color = as.factor(main_bas)
)) +
geom_line() +
theme(legend.position = "none")
}
)
plot_grid(plotlist = p_tmp_ecoregion, ncol = 3)
Reproducibility receipt
## datetime
Sys.time()
## repository
if(requireNamespace('git2r', quietly = TRUE)) {
git2r::repository()
} else {
c(
system2("git", args = c("log", "--name-status", "-1"), stdout = TRUE),
system2("git", args = c("remote", "-v"), stdout = TRUE)
)
}
## session info
sessionInfo()
Blowes, Shane A., Sarah R. Supp, Laura H. Antão, Amanda Bates, Helge Bruelheide, Jonathan M. Chase, Faye Moyes, et al. 2019. “The Geography of Biodiversity Change in Marine and Terrestrial Assemblages.” Science 366 (6463): 339–45. https://doi.org/10.1126/science.aaw1620.
Dornelas, Maria, Nicholas J. Gotelli, Brian McGill, Hideyasu Shimadzu, Faye Moyes, Caya Sievers, and Anne E. Magurran. 2014. “Assemblage Time Series Reveal Biodiversity Change but Not Systematic Loss.” Science 344 (6181): 296–99. https://doi.org/10.1126/science.1248484.
Hillebrand, Helmut, Bernd Blasius, Elizabeth T. Borer, Jonathan M. Chase, John A. Downing, Britas Klemens Eriksson, Christopher T. Filstrup, et al. 2018. “Biodiversity Change Is Uncoupled from Species Richness Trends: Consequences for Conservation and Monitoring.” Journal of Applied Ecology 55 (1): 169–84. https://doi.org/https://doi.org/10.1111/1365-2664.12959.
Klink, Roel van, Diana E. Bowler, Konstantin B. Gongalsky, Ann B. Swengel, Alessandro Gentile, and Jonathan M. Chase. 2020. “Meta-Analysis Reveals Declines in Terrestrial but Increases in Freshwater Insect Abundances.” Science 368 (6489): 417–20. https://doi.org/10.1126/science.aax9931.
Vellend, Mark, Lander Baeten, Isla H. Myers-Smith, Sarah C. Elmendorf, Robin Beauséjour, Carissa D. Brown, Pieter De Frenne, Kris Verheyen, and Sonja Wipf. 2013. “Global Meta-Analysis Reveals No Net Change in Local-Scale Plant Biodiversity over Time.” Proceedings of the National Academy of Sciences 110 (48): 19456–9.